package spark_from_scratch.mllib.regression

/**
 * @author andrew
 * @email zengjunjie1026@163.com
 * @date 2020/9/3 5:18 下午
 * @version 1.0
 */
import org.apache.log4j.{Level, Logger}
import org.apache.spark.mllib.linalg
import org.apache.spark.mllib.linalg.Vectors
import org.apache.spark.mllib.regression.LabeledPoint
import org.apache.spark.{SparkConf, SparkContext}



object LinearRegressionDemo {
  def main(args:Array[String]): Unit ={
    // 屏蔽不必要的日志显示终端上
    Logger.getLogger("org.apache.spark").setLevel(Level.ERROR)
    Logger.getLogger("org.eclipse.jetty.server").setLevel(Level.OFF)



    // 设置运行环境
    val conf = new SparkConf().setAppName("Kmeans").setMaster("local[4]")
    val sc = new SparkContext(conf)



    // Load and parse the data
    val data = sc.textFile("/home/hadoop/upload/class8/lpsa.data")
    val parsedData = data.map { line =>
      val parts = line.split(',')
      LabeledPoint(parts(0).toDouble, Vectors.dense(parts(1).split(' ').map(_.toDouble)))

    }



    // Building the model
    val numIterations = 100
    val model = LinearRegressionWithSGD



    // Evaluate model on training examples and compute training error
    val valuesAndPreds = parsedData.map { point =>
      val prediction = model.predict(point.features)
      (point.label, prediction)
    }



    val MSE = valuesAndPreds.map{ case(v, p) => math.pow((v - p), 2)}.reduce (_ + _) / valuesAndPreds.count
    println("training Mean Squared Error = " + MSE)



    sc.stop()

  }

  object LinearRegressionWithSGD {
    def predict(features: linalg.Vector) = ???

  }

}
